import pandas as pd
import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import plotly.offline as pyo
import quandl
import yfinance as yf
import pylab
import seaborn as sns #built on top of matplotlib and is useful in providing useful interface for showing results
from datetime import datetime #for data retrival of MnC's finances
from pandas_datareader import data
pd.options.plotting.backend='plotly'
pyo.init_notebook_mode(connected='True')
Here we imported all the packages need fot the analysis
#Setting the begining and endind dates
today=datetime.now()
year_ago=datetime(today.year-1,today.month,today.day) #starting date= one yr prior to current date
Next we set the current datetime to today's and the time duration to 1year
TWTR=pd.read_csv("TWTR.csv")
MET=pd.read_csv("MET.csv")
MSFT=pd.read_csv("MSFT.csv")
AMZN=pd.read_csv("AMZN.csv")
EBAY=pd.read_csv("EBAY.csv")
NFLX=pd.read_csv("NFLX.csv")
#SIX companies choosed for data extraction
company_list=['TWTR','MET','MSFT','AMZN','EBAY','NFLX']
Next, we fetch the financial data of several companies we are intrested in
TWTR.head(5)
#fIrst five rows of twitter's finance dataset
#To see how it is structured
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 07-11-2013 | 44.000000 | 45.099998 | 117701670.0 | 50.090000 | 44.900002 | 44.900002 |
| 1 | 08-11-2013 | 40.685001 | 45.930000 | 27925307.0 | 46.939999 | 41.650002 | 41.650002 |
| 2 | 11-11-2013 | 39.400002 | 40.500000 | 16113941.0 | 43.000000 | 42.900002 | 42.900002 |
| 3 | 12-11-2013 | 41.830002 | 43.660000 | 6316755.0 | 43.779999 | 41.900002 | 41.900002 |
| 4 | 13-11-2013 | 40.759998 | 41.029999 | 8688325.0 | 42.869999 | 42.599998 | 42.599998 |
It shows the first 5 columns of the financial record of the choosenn stock, it helps us to analyze how the data is categorised.
MET.head(5)
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 05-04-2000 | 12.756239 | 12.923351 | 77084766 | 14.037433 | 13.814617 | 8.351951 |
| 1 | 06-04-2000 | 13.591800 | 13.703209 | 17167722 | 14.148841 | 13.870321 | 8.385628 |
| 2 | 07-04-2000 | 13.480392 | 13.703209 | 15131516 | 13.870321 | 13.647504 | 8.250919 |
| 3 | 10-04-2000 | 13.591800 | 13.758913 | 6324938 | 13.814617 | 13.758913 | 8.318274 |
| 4 | 11-04-2000 | 13.424688 | 13.647504 | 6087860 | 13.703209 | 13.480392 | 8.149890 |
We do the same for all stocks we choose
MSFT.head(5)
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 13-03-1986 | 0.088542 | 0.088542 | 1031788800 | 0.101563 | 0.097222 | 0.060809 |
| 1 | 14-03-1986 | 0.097222 | 0.097222 | 308160000 | 0.102431 | 0.100694 | 0.062980 |
| 2 | 17-03-1986 | 0.100694 | 0.100694 | 133171200 | 0.103299 | 0.102431 | 0.064067 |
| 3 | 18-03-1986 | 0.098958 | 0.102431 | 67766400 | 0.103299 | 0.099826 | 0.062437 |
| 4 | 19-03-1986 | 0.097222 | 0.099826 | 47894400 | 0.100694 | 0.098090 | 0.061351 |
AMZN.head(5)
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 15-05-1997 | 0.096354 | 0.121875 | 1443120000 | 0.125000 | 0.097917 | 0.097917 |
| 1 | 16-05-1997 | 0.085417 | 0.098438 | 294000000 | 0.098958 | 0.086458 | 0.086458 |
| 2 | 19-05-1997 | 0.081250 | 0.088021 | 122136000 | 0.088542 | 0.085417 | 0.085417 |
| 3 | 20-05-1997 | 0.081771 | 0.086458 | 109344000 | 0.087500 | 0.081771 | 0.081771 |
| 4 | 21-05-1997 | 0.068750 | 0.081771 | 377064000 | 0.082292 | 0.071354 | 0.071354 |
EBAY.head(5)
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 24-09-1998 | 0.775989 | 0.938201 | 518656090 | 0.951354 | 0.830791 | 0.783200 |
| 1 | 25-09-1998 | 0.721188 | 0.797910 | 121255834 | 0.808870 | 0.786949 | 0.741870 |
| 2 | 28-09-1998 | 0.806678 | 0.832983 | 89772883 | 0.892168 | 0.846135 | 0.797665 |
| 3 | 29-09-1998 | 0.837367 | 0.865863 | 39289536 | 0.881208 | 0.857095 | 0.807997 |
| 4 | 30-09-1998 | 0.758453 | 0.827502 | 44113766 | 0.841751 | 0.790237 | 0.744969 |
NFLX.head(5)
| Date | Low | Open | Volume | High | Close | Adjusted Close | |
|---|---|---|---|---|---|---|---|
| 0 | 23-05-2002 | 1.145714 | 1.156429 | 104790000 | 1.242857 | 1.196429 | 1.196429 |
| 1 | 24-05-2002 | 1.197143 | 1.214286 | 11104800 | 1.225000 | 1.210000 | 1.210000 |
| 2 | 28-05-2002 | 1.157143 | 1.213571 | 6609400 | 1.232143 | 1.157143 | 1.157143 |
| 3 | 29-05-2002 | 1.085714 | 1.164286 | 6757800 | 1.164286 | 1.103571 | 1.103571 |
| 4 | 30-05-2002 | 1.071429 | 1.107857 | 10154200 | 1.107857 | 1.071429 | 1.071429 |
TWTR.info()
#column datatypes, data stored in dataframe, gives a brief summary of the insights
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2289 entries, 0 to 2288 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 2289 non-null object 1 Low 2259 non-null float64 2 Open 2259 non-null float64 3 Volume 2259 non-null float64 4 High 2259 non-null float64 5 Close 2259 non-null float64 6 Adjusted Close 2259 non-null float64 dtypes: float64(6), object(1) memory usage: 125.3+ KB
info() gives us more insights into the dataset, its dimensions, its class, number of entries etc
plt.figure(figsize=(15,7))
plt.plot(TWTR['Adjusted Close'])
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid()
plt.show()
Here our main focus is on "Adjusted Close" section of the data, it means the cash value of the last transacted price before the market closes. The adjusted closing price is attributed to anything that would affect the stock price after the market closes for the day.
print('First',TWTR.Close[0],'End', TWTR.Close[2258])
#Here we can see how the prices varied, intial vs closing
First 44.900001525878906 End 53.70000076293945
# We can also compute log returns as its time additive
log_returns=np.log(TWTR.Close/TWTR.Close.shift(1)).dropna()
#we also dropped the nans
log_returns
1 -0.075136
2 0.029570
3 -0.023586
4 0.016568
5 0.047896
...
2254 -0.049849
2255 0.032150
2256 0.024162
2257 0.010742
2258 0.006539
Name: Close, Length: 2258, dtype: float64
log_returns.mean()
7.926270460146603e-05
TWTR.Close[0]*(np.exp(log_returns.mean()*len(log_returns)))
53.7000007629395
As we can see it accurately predicted the closing price of the time series. It can't be done by simple returns as if we want to model returns using the normal distribution! Simple Returns: The product of normally distributed variables is not normally distributed. Log Returns: The sum of normally distributed variables folllow a normal distribution.
# Lets try to calculate the same using simple returns and see how it compares
simple_returns=TWTR.Close.pct_change().dropna()
# we also dropped the NaNs using dropna()
simple_returns
1 -0.072383
2 0.030012
3 -0.023310
4 0.016706
5 0.049061
...
2284 0.000000
2285 0.000000
2286 0.000000
2287 0.000000
2288 0.000000
Name: Close, Length: 2288, dtype: float64
simple_returns.mean()
0.0006528994280158439
TWTR.Close[0]*(np.prod([(1+Rt) for Rt in simple_returns]))
53.70000076293928
As we see by simple returns the prediction is close to the actually closing price but not accurately close, also its very non-intutive to follow through
#Histogram of log returns
log_returns.plot(kind='hist')
As we can see it's mostly centered around 0, looks a bit normally distributed. But there is quite a catch here is normality a good assumption for financial data? The assumption that prices or more accurately log returns are normally distributed.
# lets take the best and the worst case and see the std deviation and the probability of occurances if its assumed to be normal
log_returns_sorted=log_returns.tolist()
log_returns_sorted.sort()
# Here we sorted all the log returns according to their value,
# to get the lowest and the highest case (the leftmost and the Rightmost case in the Histogram)
worst=log_returns_sorted[0]
best=log_returns_sorted[-1]
std_worst=(worst-log_returns.mean())/log_returns.std()
std_best=(best-log_returns.mean())/log_returns.std()
# here we normalise it
print('Std dev. worst %.2f best %.2f' %(std_worst,std_best))
Std dev. worst -8.07 best 7.00
As we can see the deviation is huge, as in normal distribution about 99.75% of the data is within 3 standard deviations, which is just not the case here.
Q-Q or Quantile-Quantile Plots It plots two sets of quantiles against one another i.e. theoritical quantiles against actual quantiles of the variable.
stats.probplot(log_returns, dist='norm', plot=pylab)
print('Q-Q PLOT')
Q-Q PLOT
As we can see from here normally treating financial data as normally distributed is not a bad assumption for the most part, except for the tails. Which we can see from the plot as well, at the tails and heads there seems a deviation from normality.
MSFT['Adjusted Close'].plot()
We do the same for other assets as well, it would help us to annalyse how it is varrying wrt days
NFLX['Adjusted Close'].plot()
MET['Adjusted Close'].plot()
EBAY['Adjusted Close'].plot()
We then incorporate moving averages to eliminate fluctuations and this process is called smoothing of time series. It reduces the amount of variations present in the data. The main advantage of SMA is that it offers a smoothed line, less prone to whipsawing. Its often favoured by traders operating on longer time frames such as daily or weekly charts
#Number of days for which moving average will be calculated
MA_days = [10, 20, 30]
for ma in MA_days:
ma_str = "MA: {}".format(ma)
TWTR[ma_str] = TWTR['Adjusted Close'].rolling(ma).mean()
MET[ma_str] = MET['Adjusted Close'].rolling(ma).mean()
MSFT[ma_str] = MSFT['Adjusted Close'].rolling(ma).mean()
AMZN[ma_str] = AMZN['Adjusted Close'].rolling(ma).mean()
NFLX[ma_str] = NFLX['Adjusted Close'].rolling(ma).mean()
EBAY[ma_str] = EBAY['Adjusted Close'].rolling(ma).mean()
This code calculates the moving averages (MA) for a given set of days (10, 20, and 30) for the 'Adj Close' price data
def plot_graphic(company, company_string):
plt.figure(figsize=(15, 6))
plt.plot(company['Adjusted Close'])
plt.plot(company['MA: 10'])
plt.plot(company['MA: 20'])
plt.plot(company['MA: 30'])
plt.title(company_string)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend(('Adjusted Close','MA: 10', 'MA: 20', 'MA:30'))
plt.grid()
plt.show()
For a specified company's stock market data, the code defines a function called plot_graphic, which creates a graph illustrating the adjusted closing prices and three moving averages (MA: 10, MA: 20, and MA: 30). The function takes two parameters: company, which represents the data for the company, and company_string, which represents the company's name. An effective way to visualize adjusted closing prices and moving averages of a specific company's stock market data is to use the plot_graphic function. Over time, it makes it easier to analyze and compare price trends. The more days we incorporate the more smoothing we get, which can be seen in the graph below.
data_list = [ TWTR,MET, EBAY,NFLX, MSFT, AMZN]
for i in range(len(data_list)):
plot_graphic(data_list[i], company_list[i])
Data sets for different companies are iterated through in the data_list. A plot_graphic function is invoked for each data set, with the corresponding company data and name provided as input. As a result, distinct graphs are created for each company, which allow visualization of their adjusted closing prices and moving averages. As u can see the red line gives more smoothening over blue line, which gives more smoothening over green line.
# Calculate returns and draw distribution
for i in range(len(data_list)):
data_list[i]['Daily Returns'] = data_list[i]['Adjusted Close'].pct_change()
sns.displot(data_list[i]['Daily Returns'].dropna(), bins=50, color='blue', kde=True)
plt.title(company_list[i])
plt.show()
This code snippet calculates the daily returns for each company within the data_list and visualizes the distribution of these returns using a histogram. On observing we see it follows almost a normal distribution, we say “almost” as the head and tail parts of the histogram (upon which we did smoothening to arrive at this conclusion) don’t follow Gaussian distribution very strictly (why? And how can we say this? I have explained this in my talk of normality section). And the smoothening has been achieved due to the usage of moving average, as histogram itself is a discrete distribution and in order to achieve a continuous distribution moving average provides a good means for visualization.
import pandas as pd
# Assuming data_list is a list of dictionaries with 'Daily Returns' as a key and the values are pandas Series
daily_returns_list = [data['Daily Returns'] for data in data_list]
# Concatenate the daily return series along the columns axis
stock_returns = pd.concat(daily_returns_list, axis=1)
# Set the columns names to match the company_list
stock_returns.columns = company_list
# Display the first few rows of the resulting DataFrame
stock_returns.head()
| TWTR | MET | MSFT | AMZN | EBAY | NFLX | |
|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | -0.072383 | 0.004032 | -0.052771 | 0.011343 | 0.035712 | -0.117028 |
| 2 | 0.030012 | -0.016064 | 0.075209 | -0.043684 | 0.017250 | -0.012040 |
| 3 | -0.023310 | 0.008163 | 0.012953 | -0.046297 | -0.025432 | -0.042685 |
| 4 | 0.016706 | -0.020243 | -0.078005 | -0.029125 | -0.017390 | -0.127392 |
Several operations are carried out using the pandas library in the provided code: First, it imports the pandas library, denoted by import pandas as pd. In this way, pandas can be used to manipulate and analyze data. The next step is to create a list called daily_returns_list by using a list comprehension. Each dictionary in data_list is iterated through to retrieve the values associated with the key 'Daily Returns'. Stock_returns is generated by concatenating the pandas Series objects from daily_returns_list along the columns axis using the pd.concat() function. A combined view of the daily returns data for all of the companies is provided by this DataFrame, in which each column represents the daily returns of a different company. Stock_returns.columns = company_list ensures that the column names in stock_returns align with the names of the companies. Lastly, stock_returns.head() is used to display the first few rows of the resulting DataFrame. A list of dictionaries containing daily returns data for different companies is processed by this code. It allows comparison and analysis of the daily returns across companies by concatenating them into a single DataFrame. Stock_returns, the resulting DataFrame, consolidates the daily returns by company, with each column denoting a specific company.
sns.pairplot(stock_returns.dropna())
<seaborn.axisgrid.PairGrid at 0x162a45572d0>
In this code snippet, sns.pairplot(stock_returns.dropna()) uses the Seaborn library to create a scatter plot matrix based on the stock_returns DataFrame. A breakdown of the code can be found here: The sns.pairplot() function produces a grid of scatter plots that display pairwise comparisons of variables within a dataset. The stock_returns DataFrame will be plotted with pairplot(). The stock_returns.dropna() operation ensures that the scatter plot matrix contains complete data. The dropna() function removes any rows in stock_returns that contain missing values, denoted as NaN. Seaborn's pairplot() function is used to create a scatter plot matrix. In this matrix, you can see how variables within the stock_returns DataFrame are related pairwise. Dropna() ensures that data for scatter plots is complete. The resulting plot can provide insight into the correlations and patterns between the daily returns of different companies.
# Build correlation matrix
corr = stock_returns.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.figure(figsize=(10, 10))
sns.heatmap(corr, mask=mask, square=True, linewidths=.5, annot=True)
plt.show()
A heatmap plot is generated to visualize the correlations between stock_returns and the correlation matrix created in the provided code snippet.
mean_income = stock_returns.mean() # Mean income for each stock
cov_returns = stock_returns.cov() # Covariation
count = len(stock_returns.columns)
print(mean_income, cov_returns, sep='\n')
TWTR 0.000653
MET 0.000703
MSFT 0.001160
AMZN 0.001730
EBAY 0.001128
NFLX 0.001701
dtype: float64
TWTR MET MSFT AMZN EBAY NFLX
TWTR 0.001138 -0.000033 0.000013 0.000015 -0.000014 0.000049
MET -0.000033 0.000650 0.000019 0.000002 -0.000007 -0.000017
MSFT 0.000013 0.000019 0.001031 0.000005 0.000011 -0.000005
AMZN 0.000015 0.000002 0.000005 0.001295 0.000015 0.000006
EBAY -0.000014 -0.000007 0.000011 0.000015 0.000455 0.000004
NFLX 0.000049 -0.000017 -0.000005 0.000006 0.000004 0.001311
# Function, that generate random shares
def randomPortfolio():
share = np.exp(np.random.randn(count))
share = share / share.sum()
return share
randomPortfolio() is responsible for generating a random portfolio of shares. The randomPortfolio() function generates a random portfolio of shares by drawing random values from a standard normal distribution, exponentiating them to ensure positive values, and then normalizing them to represent proportions of the total portfolio value. By calling this function, you can obtain a random allocation of shares for a portfolio.
def IncomePortfolio(Rand):
return np.matmul(mean_income.values, Rand)
def RiskPortfolio(Rand):
return np.sqrt(np.matmul(np.matmul(Rand, cov_returns.values), Rand))
The provided code includes two functions: IncomePortfolio(Rand) and RiskPortfolio(Rand). These functions are designed to perform calculations related to income and risk for a portfolio. the IncomePortfolio(Rand) function calculates the expected income of a portfolio based on the mean income values and the allocation of assets. The RiskPortfolio(Rand) function, on the other hand, calculates the risk of a portfolio based on the covariance matrix of returns and the allocation of assets. Together, these functions provide essential metrics for evaluating the income and risk characteristics of a portfolio
combinations = 10000
risk = np.zeros(combinations)
income = np.zeros(combinations)
portfolio = np.zeros((combinations, count))
# Function, which create new combinations of shares
for i in range(combinations):
rand = randomPortfolio()
portfolio[i, :] = rand
risk[i] = RiskPortfolio(rand)
income[i] = IncomePortfolio(rand)
First, the variable "combinations" is initialized to 10000. This variable determines the number of portfolio combinations that will be generated and evaluated Next, three arrays named "risk," "income," and "portfolio" are created and initialized with zeros. These arrays will store the risk, income, and portfolio data for each combination. Within the loop, the function "randomPortfolio()" is called to generate a random portfolio. It generates a set of shares along with their corresponding weights. The randomly generated portfolio is then assigned to the ith row of the "portfolio" array. Each row in the "portfolio" array represents a different combination of shares. The "RiskPortfolio()" function is invoked, passing the current portfolio as an argument. This function calculates the risk associated with the given portfolio.The calculated risk value is assigned to the ith element of the "risk" array. The "IncomePortfolio()" function is called with the current portfolio as an argument. This function computes the income or expected return of the portfolio.The calculated income value is then assigned to the ith element of the "income" array.
The ratio is the average return earned in excess of the risk-free rate per unit of volatility or total risk. Volatility is a measure of the price fluctuations of an asset or portfolio. The risk-free rate of return is the return on an investment with zero risk, meaning it’s the return investors could expect for taking no risk. The optimal risky portfolio is the one with the highest Sharpe ratio.
plt.figure(figsize=(15, 8))
plt.scatter(risk * 100, income * 100, c="b", marker=".")
plt.xlabel("Risk")
plt.ylabel("Income")
plt.title("Portfolios")
MaxSharpRatio = np.argmax(income / risk)
plt.scatter([risk[MaxSharpRatio] * 100], [income[MaxSharpRatio] * 100], c="r", marker="o", label="Max Sharp ratio")
plt.legend()
plt.show()
The provided code snippet utilizes the matplotlib library to create a scatter plot visualizing the relationship between risk and income, which were calculated in the previous code. It generates a scatter plot where the x-axis represents the risk and the y-axis represents the income for multiple portfolios. It further highlights the portfolio with the maximum Sharpe ratio by adding a red dot at its corresponding risk and income values and including a legend to identify it. The scatter plot provides a visual representation of the risk and income relationship for the portfolios.
best_port = portfolio[MaxSharpRatio]
for i in range(len(company_list)):
print("{} : {}".format(company_list[i], best_port[i]))
TWTR : 0.09421099274093087 MET : 0.11835145975660954 MSFT : 0.12276042643875588 AMZN : 0.16256342310905778 EBAY : 0.32309664747213723 NFLX : 0.1790170504825088
It identifies the portfolio with the highest Sharpe ratio and then displays the allocation or weight assigned to each company in that portfolio. It allows us to see how the assets or companies are distributed within the best-performing portfolio.
days = 365
dt = 1 / days
# here I have divided by 365 but usually we do by 252 (the number of trading days)
stock_returns.dropna(inplace=True)
mu = stock_returns.mean()
sigma = stock_returns.std()
It calculates the average daily returns (mu) and the standard deviation of daily returns (sigma) for a stock based on the available data. These values are commonly used in financial analysis and risk assessment to understand the historical performance and volatility of a stock.
def monte_carlo(start_price, days, mu, sigma):
price = np.zeros(days)
price[0] = start_price
shock = np.zeros(days)
drift = np.zeros(days)
for x in range(1, days):
shock[x] = np.random.normal(loc=mu * dt, scale=sigma*np.sqrt(dt))
drift[x] = mu * dt
price[x] = price[x-1] + (price[x-1] * (drift[x] + shock[x]))
return price
The provided code snippet introduces a function named monte_carlo that employs the Monte Carlo method to simulate the future price of a stock. The monte_carlo function uses the Monte Carlo method to generate simulated stock prices for a specified number of days. It considers the initial stock price, average daily return, and standard deviation of daily returns. The function incorporates random shocks and drift components to calculate the simulated prices for each day. For the generation of random paths, I have used arithmetic Brownian motion, instead of this one can use geometric Brownian motion too. The more the variance the more the spread is, and the less the steepness is.In context to Monte Carlo Simulation, the random paths generated would be less differentiating if the variance is less and it would be more if the variance is more resulting in a flatter curve.
start_price = 35.65
sim = np.zeros(1000)
plt.figure(figsize=(15, 8))
for i in range(10000):
result = monte_carlo(start_price, days, mu['TWTR'], sigma['TWTR'])
sim[i] = result[days - 1]
plt.plot(result)
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Monte Carlo analysis for Twitter')
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) Cell In[172], line 7 5 for i in range(10000): 6 result = monte_carlo(start_price, days, mu['TWTR'], sigma['TWTR']) ----> 7 sim[i] = result[days - 1] 8 plt.plot(result) 10 plt.xlabel('Days') IndexError: index 1000 is out of bounds for axis 0 with size 1000
The code snippet in question carries out a Monte Carlo analysis for Twitter's stock, which is denoted by the ticker symbol 'TWTR'. It commences by assigning the initial stock price of Twitter at $35.65, as indicated by the line "start_price = 35.65". The next line, "sim = np.zeros(1000)", establishes an array named 'sim' composed of 1000 zeros. These zeros are placeholders for the final predicted stock prices following the Monte Carlo simulations. This code performs a Monte Carlo analysis of Twitter's stock by predicting its future prices through 1000 simulations. The final prices from these simulations are stored in the 'sim' array and plotted. By doing so, the code offers potential insights into the future price range of Twitter's stock, as determined by the Monte Carlo simulations.
plt.figure(figsize=(10, 7))
plt.hist(sim, bins=100)
plt.figtext(0.6, 0.7, "Mean: {} \nStd: {} \nStart Price: {}".format(sim.mean(), sim.std(), start_price))
plt.show()
The provided code constructs a histogram to illustrate the distribution of the Twitter stock's simulated prices derived from the Monte Carlo simulations. The visualization includes text annotations that outline the mean, standard deviation, and initial price of the simulated prices. This visual representation provides valuable insights into the potential range and traits of Twitter's future stock prices, as per the Monte Carlo simulations.
start_price = 907.34
sim = np.zeros(1000)
plt.figure(figsize=(15, 8))
for i in range(1000):
result = monte_carlo(start_price, days, mu['EBAY'], sigma['EBAY'])
sim[i] = result[days - 1]
plt.plot(result)
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Monte Carlo analysis for EBAY')
Text(0.5, 1.0, 'Monte Carlo analysis for EBAY')
plt.figure(figsize=(10, 7))
plt.hist(sim, bins=100)
plt.figtext(0.6, 0.7, "Mean: {} \nStd: {} \nStart Price: {}".format(sim.mean(), sim.std(), start_price))
plt.show()
start_price = 300.95
sim = np.zeros(1000)
plt.figure(figsize=(15, 8))
for i in range(1000):
result = monte_carlo(start_price, days, mu['MSFT'], sigma['MSFT'])
sim[i] = result[days - 1]
plt.plot(result)
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Monte Carlo analysis for Microsoft')
Text(0.5, 1.0, 'Monte Carlo analysis for Microsoft')
plt.figure(figsize=(10, 7))
plt.hist(sim, bins=100)
plt.figtext(0.6, 0.7, "Mean: {} \nStd: {} \nStart Price: {}".format(sim.mean(), sim.std(), start_price))
plt.show()
start_price = 300.95
sim = np.zeros(1000)
plt.figure(figsize=(15, 8))
for i in range(1000):
result = monte_carlo(start_price, days, mu['AMZN'], sigma['AMZN'])
sim[i] = result[days - 1]
plt.plot(result)
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Monte Carlo analysis for Amazon')
Text(0.5, 1.0, 'Monte Carlo analysis for Amazon')
plt.figure(figsize=(10, 7))
plt.hist(sim, bins=100)
plt.figtext(0.6, 0.7, "Mean: {} \nStd: {} \nStart Price: {}".format(sim.mean(), sim.std(), start_price))
plt.show()
We did the same analysis for other stocks as well, which we discussed in detail for Twitter.
We can also extend this discussion to include risk factor along with it to incorporate more diversity
# lets import more data
df = yf.download(['AAPL', 'NKE', 'GOOGL', 'AMZN'], start='2015-01-01', end='2019-12-31')
df.head()
[*********************100%%**********************] 4 of 4 completed
| Adj Close | Close | High | ... | Low | Open | Volume | |||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAPL | AMZN | GOOGL | NKE | AAPL | AMZN | GOOGL | NKE | AAPL | AMZN | ... | GOOGL | NKE | AAPL | AMZN | GOOGL | NKE | AAPL | AMZN | GOOGL | NKE | |
| Date | |||||||||||||||||||||
| 2015-01-02 | 24.498680 | 15.4260 | 26.477501 | 43.225178 | 27.332500 | 15.4260 | 26.477501 | 47.514999 | 27.860001 | 15.7375 | ... | 26.393999 | 47.055000 | 27.847500 | 15.6290 | 26.629999 | 48.275002 | 212818400 | 55664000 | 26480000 | 4985800 |
| 2015-01-05 | 23.808517 | 15.1095 | 25.973000 | 42.529240 | 26.562500 | 15.1095 | 25.973000 | 46.750000 | 27.162500 | 15.4190 | ... | 25.887501 | 46.564999 | 27.072500 | 15.3505 | 26.357500 | 47.255001 | 257142000 | 55484000 | 41182000 | 6889200 |
| 2015-01-06 | 23.810755 | 14.7645 | 25.332001 | 42.279072 | 26.565001 | 14.7645 | 25.332001 | 46.474998 | 26.857500 | 15.1500 | ... | 25.277500 | 46.035000 | 26.635000 | 15.1120 | 26.025000 | 46.945000 | 263188400 | 70380000 | 54456000 | 7576000 |
| 2015-01-07 | 24.144642 | 14.9210 | 25.257500 | 43.152390 | 26.937500 | 14.9210 | 25.257500 | 47.435001 | 27.049999 | 15.0640 | ... | 25.182501 | 46.549999 | 26.799999 | 14.8750 | 25.547501 | 46.805000 | 160423600 | 52806000 | 46918000 | 7256000 |
| 2015-01-08 | 25.072321 | 15.0230 | 25.345501 | 44.148537 | 27.972500 | 15.0230 | 25.345501 | 48.529999 | 28.037500 | 15.1570 | ... | 24.750999 | 47.810001 | 27.307501 | 15.0160 | 25.075500 | 47.830002 | 237458000 | 61768000 | 73054000 | 5978200 |
5 rows × 24 columns
# Closing price
df = df['Adj Close']
df.head()
| AAPL | AMZN | GOOGL | NKE | |
|---|---|---|---|---|
| Date | ||||
| 2015-01-02 | 24.498680 | 15.4260 | 26.477501 | 43.225178 |
| 2015-01-05 | 23.808517 | 15.1095 | 25.973000 | 42.529240 |
| 2015-01-06 | 23.810755 | 14.7645 | 25.332001 | 42.279072 |
| 2015-01-07 | 24.144642 | 14.9210 | 25.257500 | 43.152390 |
| 2015-01-08 | 25.072321 | 15.0230 | 25.345501 | 44.148537 |
# Log of percentage change
cov_matrix = df.pct_change().apply(lambda x: np.log(1+x)).cov()
cov_matrix
| AAPL | AMZN | GOOGL | NKE | |
|---|---|---|---|---|
| AAPL | 0.000246 | 0.000142 | 0.000122 | 0.000084 |
| AMZN | 0.000142 | 0.000334 | 0.000176 | 0.000092 |
| GOOGL | 0.000122 | 0.000176 | 0.000222 | 0.000085 |
| NKE | 0.000084 | 0.000092 | 0.000085 | 0.000219 |
# Yearly returns for individual companies
ind_er = df.resample('Y').last().pct_change().mean()
ind_er
AAPL 0.357854 AMZN 0.295745 GOOGL 0.155478 NKE 0.160666 dtype: float64
# Volatility is given by the annual standard deviation. We multiply by 250 because there are 250 trading days/year.
ann_sd = df.pct_change().apply(lambda x: np.log(1+x)).std().apply(lambda x: x*np.sqrt(250))
ann_sd
AAPL 0.247871 AMZN 0.288770 GOOGL 0.235375 NKE 0.233916 dtype: float64
assets = pd.concat([ind_er, ann_sd], axis=1) # Creating a table for visualising returns and volatility of assets
assets.columns = ['Returns', 'Volatility']
assets
| Returns | Volatility | |
|---|---|---|
| AAPL | 0.357854 | 0.247871 |
| AMZN | 0.295745 | 0.288770 |
| GOOGL | 0.155478 | 0.235375 |
| NKE | 0.160666 | 0.233916 |
p_ret = [] # Define an empty array for portfolio returns
p_vol = [] # Define an empty array for portfolio volatility
p_weights = [] # Define an empty array for asset weights
num_assets = len(df.columns)
num_portfolios = 10000
for portfolio in range(num_portfolios):
weights = np.random.random(num_assets)
weights = weights/np.sum(weights)
p_weights.append(weights)
returns = np.dot(weights, ind_er) # Returns are the product of individual expected returns of asset and its
# weights
p_ret.append(returns)
var = cov_matrix.mul(weights, axis=0).mul(weights, axis=1).sum().sum()# Portfolio Variance
sd = np.sqrt(var) # Daily standard deviation
ann_sd = sd*np.sqrt(250) # Annual standard deviation = volatility
p_vol.append(ann_sd)
data = {'Returns':p_ret, 'Volatility':p_vol}
for counter, symbol in enumerate(df.columns.tolist()):
#print(counter, symbol)
data[symbol+' weight'] = [w[counter] for w in p_weights]
po-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
portfolios.head() # Dataframe of the 100000 portfolios created
| Returns | Volatility | AAPL weight | AMZN weight | GOOGL weight | NKE weight | |
|---|---|---|---|---|---|---|
| 0 | 0.233882 | 0.200596 | 0.142864 | 0.343262 | 0.254882 | 0.258992 |
| 1 | 0.210969 | 0.190450 | 0.138952 | 0.181584 | 0.313098 | 0.366367 |
| 2 | 0.223053 | 0.187068 | 0.313108 | 0.016607 | 0.307785 | 0.362501 |
| 3 | 0.254050 | 0.193420 | 0.427602 | 0.068667 | 0.040378 | 0.463353 |
| 4 | 0.293442 | 0.235887 | 0.260769 | 0.606718 | 0.115565 | 0.016948 |
# Plot efficient frontier
portfolios.plot.scatter(x='Volatility', y='Returns')
min_vol_port = portfolios.iloc[portfolios['Volatility'].idxmin()]
# idxmin() gives us the minimum value in the column specified.
min_vol_port
Returns 0.211624 Volatility 0.186612 AAPL weight 0.242990 AMZN weight 0.034792 GOOGL weight 0.319346 NKE weight 0.402872 Name: 7027, dtype: float64
# plotting the minimum volatility portfolio
plt.subplots(figsize=[10,10])
plt.scatter(portfolios['Volatility'], portfolios['Returns'],marker='o', s=10, alpha=0.3)
plt.scatter(min_vol_port[1], min_vol_port[0], color='r', marker='*', s=500)
<matplotlib.collections.PathCollection at 0x162a8247450>
# Finding the optimal portfolio
rf = 0.01 # risk factor
optimal_risky_port = portfolios.iloc[((portfolios['Returns']-rf)/portfolios['Volatility']).idxmax()]
optimal_risky_port
Returns 0.325920 Volatility 0.219246 AAPL weight 0.725896 AMZN weight 0.164110 GOOGL weight 0.010004 NKE weight 0.099990 Name: 393, dtype: float64
# Plotting optimal portfolio
plt.subplots(figsize=(10, 10))
plt.scatter(portfolios['Volatility'], portfolios['Returns'],marker='o', s=10, alpha=0.3)
plt.scatter(min_vol_port[1], min_vol_port[0], color='r', marker='*', s=500)
plt.scatter(optimal_risky_port[1], optimal_risky_port[0], color='g', marker='*', s=500)
<matplotlib.collections.PathCollection at 0x162a81cf710>